### RECORD INDIVIDUAL LEVEL TUNEOUT DECISIONS

library(tidyverse)
library(parallel)
library(lubridate)
library(hms)
library(broom)
library(ggthemes)
library(data.table)
library(magrittr)

### SET WORKING DIRECTORY HERE ###
path_to_archive <- "replication/"
setwd(path_to_archive)


# for timezone adjustments
load("data/dma_timezone.RData")
dma_timezone <- dma_timezone %>% 
	select(dma_code, timezone) %>%
	mutate(timezone = recode(timezone, ETZ = "America/New_York", CTZ = "America/Chicago", PTZ="America/Los_Angeles")) %>%
	filter(!is.na(dma_code))

dma_timezone %<>% as.data.table 

### THESE FILES ARE PROPRIETARY (FROM FWM DATA)
rawfile_name <- function(date) paste0("data/FWM/raw/rawxml/FWM_", year(date), str_pad(month(date), pad="0", width=2), str_pad(day(date), pad="0", width=2), "_R.pd.gz")
reffile_name <- function(date) paste0("data/FWM/ref_data/fwm_ref_data_", year(date), str_pad(month(date), pad="0", width=2), str_pad(day(date), pad="0", width=2), ".RData")


read_raw <- function(d) {
	cat("\tReading raw data...\n")	
	raw_view <- d %>% rawfile_name %>%
		fread( 
         sep="|", 
         col.names = c("mso", "device_id", "event_date", "event_time", "event_type", "event_value", "event_name", "event_id"),
         colClasses="character"
        ) %>%
		setnames(old=c("event_name", "event_value"), new=c("channel", "channel_num"))

	cat("\tConverting times and dates...\n")
	raw_view[,event_date := ymd(event_date)]
    raw_view[,event_time := hms(hours=as.numeric(substr(event_time,1,2)), minutes=as.numeric(substr(event_time, 3,4)), seconds=as.numeric(substr(event_time, 5,6)))]

	raw_view[channel_num == "65532", event_type := "O"]
	raw_view[event_type != "T", channel := "OFF"]

	ref_day <- reffile_name(d) %>% readRDS %>% 
		as.data.table %>%
		.[,c("device_id","dma_code","zipcode")] %>%
		.[dma_timezone, on="dma_code"]

	raw_view <- ref_day[raw_view,on="device_id"]
	
	# replace missing dmas with LEXINGTON KY (541)
	raw_view[is.na(dma_code), dma_code:="541"]
	raw_view[is.na(timezone), timezone:="America/New_York"]

	raw_view[timezone == "America/New_York", event_time_utc := ymd_hms(paste(event_date, event_time, sep=" "), tz="America/New_York") %>% with_tz("UTC")]
	raw_view[timezone == "America/Chicago", event_time_utc := ymd_hms(paste(event_date, event_time, sep=" "), tz="America/Chicago") %>% with_tz("UTC")]
	raw_view[timezone == "America/Los_Angeles", event_time_utc := ymd_hms(paste(event_date, event_time, sep=" "), tz="America/Los_Angeles") %>% with_tz("UTC")]
	raw_view[is.na(event_time_utc), event_time_utc := ymd_hms(paste(event_date, event_time, sep=" "), tz="America/New_York") %>% with_tz("UTC")]


	cat("\tAppending end-of-day events...\n")
	end_of_day <- data.table(
		event_date = d,
		event_time = hms(hours=rep(23,3),minutes=rep(59,3),seconds=rep(59,3)),
		timezone = c("America/New_York", "America/Chicago", "America/Los_Angeles"))
	end_of_day[, event_time_utc := imap(timezone, ~ ymd_hms(paste(event_date[.y], event_time[.y], sep=" "), tz = .x) %>% with_tz("UTC")) %>% reduce(c) ]


	every_dev_end <- raw_view[!duplicated(device_id),.(device_id,dma_code,zipcode,timezone,mso,event_id)]
	every_dev_end[,event_type:="E"]
	every_dev_end[,event_id:=paste(device_id, "END", sep="_")]
	every_dev_end[,channel_num:=0]
	every_dev_end[,channel:="END"]

	every_dev_end <- end_of_day[every_dev_end, on="timezone"]

	raw_view <- rbind(raw_view, every_dev_end)
	raw_view <- raw_view[order(device_id, event_time_utc),]

	cat("\tGenerating viewing intervals...\n")
	raw_view[, event_time_utc_end := lead(event_time_utc), by = .(device_id)]
	raw_view <- raw_view[!is.na(event_time_utc_end)]

	## cap segment time at 5h (18000s), ~ 99th percentile
	raw_view[event_type=="T", dur := time_length(event_time_utc_end - event_time_utc, unit="second")]
	raw_view[dur > 18000, `:=` (dur = 18000, event_time_utc_end = event_time_utc + dseconds(18000)) ]

	raw_view[event_type=="T"]
}


get_tune_out <- function(t_c, ad_time, ad_dur, tuning, window_restrict=60) {
	tune <- tuning[t_c[group=="T"], on="device_id",nomatch=0]

	# get actual tune-outs during ad by T group
	tune_outs_actual <- tune %>%
		.[event_time_utc > ad_time & event_time_utc <= ad_time + seconds(ad_dur)] %>%
		setkey(device_id, event_time_utc) %>%
		unique(by="device_id")

	# random tune outs. First exclude 30 min prior to ad to end of ad
	tune[,tune_at_end := T]

	# case 1:
	tune[event_time_utc < ad_time - minutes(30) &           # starts before begin of window
		 event_time_utc_end > ad_time - minutes(30) & 		# ends after begin of window
		 event_time_utc_end <= ad_time + seconds(ad_dur),   # but ends before end of window
		 `:=` (event_time_utc_end = ad_time - minutes(30), tune_at_end = F)]       # set end time to start of window
	
	# case 2:
	tune[event_time_utc > ad_time - minutes(30) & 			# starts after begin of window
		 event_time_utc < ad_time + seconds(ad_dur) &		# starts before end of window
		 event_time_utc_end > ad_time + seconds(ad_dur),   	# ends after end of window
		 `:=` (event_time_utc = ad_time + seconds(ad_dur), tune_at_end = T)]       # set begin time to end of window

	# case 3: 
	tune <- tune[!(event_time_utc > ad_time - minutes(30) & 			# starts after begin of window
				   event_time_utc_end <= ad_time + seconds(ad_dur))]	# ends before end of of window
				   														# exclude entirely
	
	# case 4:
	tune_split <- tune[event_time_utc < ad_time - minutes(30) & 		# starts before begin of window
	 				   event_time_utc_end > ad_time + seconds(ad_dur)]  # ends after end of window
	 				   													# split into two pieces

	tune_split1 <- copy(tune_split)
	tune_split1[,`:=` (event_time_utc_end = ad_time - minutes(30), tune_at_end = F)]
	tune_split2 <- copy(tune_split)
	tune_split2[,`:=` (event_time_utc = ad_time + seconds(ad_dur), tune_at_end = T)]

	tune <- rbind(tune[!(event_time_utc < ad_time - minutes(30) & 		
	 		       event_time_utc_end > ad_time + seconds(ad_dur))],
	 		       tune_split1,tune_split2) 
	# sample from remaining times at random
	# limit to times with activity in past window_restrict minutes
	# then sample uniformly over seconds where box is active
	tune_outs_random <- tune %>% 
		.[,event_length := time_length(event_time_utc_end - event_time_utc, unit="second")] %>%
		.[event_time_utc_end > event_time_utc + minutes(window_restrict), `:=` (event_time_utc_end = event_time_utc + minutes(window_restrict), event_length = window_restrict*60, tune_at_end=F)]
		
	if(nrow(tune_outs_random) > 0){
		tune_outs_random %<>%
			.[,.SD[sample.int(.N, 1, prob=event_length)], by = .(device_id)] %>%
			.[,random_s := map_int(event_length, sample.int, size=1)] %>%
			.[tune_at_end & random_s >= event_length - ad_dur] 
	}

	tibble(n_t = nrow(t_c[group=="T"]), tuneout_real = nrow(tune_outs_actual), tuneout_random =nrow(tune_outs_random))


}

# day loop
tuneout_day <- function(date) {
	cat(as.character(date), "\n")

	# load treatment / control group assignment (by device)
	t_c_df <- readRDS(paste0("data/dd/tc_groups_", as.character(date), ".rds")) %>%
		filter(map_int(t_c, ~sum(.$group=="T")) > 0)

	# load daily viewing data
	# for the union of devices in all T groups
	cat("\tReading tuning data...\n")
	tune_day <- read_raw(date)

	devices <- t_c_df$t_c %>%
		rbindlist %>%
		.[group=="T"] %>% 
		unique(by="device_id") %>%
		.[,.(device_id)]

	tune_day %<>% .[devices, on="device_id", nomatch=0]

	# loop over ads: for each ad, pull T group
	# get indicator for tuning out during ad air time 
	# compare to randomly selected time when tuned in
	cat("\tLooping over ads...\n")
	out <- t_c_df %>% 
			mutate(tune_out = pmap(list(t_c=t_c, ad_time=event_time_utc), 
				   get_tune_out,  
				   tuning=tune_day, ad_dur = 30, window_restrict=20)) %>% 
			select(-t_c) %>%
			unnest(cols=tune_out)

	out %>% saveRDS(paste0("data/tuneout/ads_dd_tuneout_", as.character(date), ".rds"))

	out 
}

set.seed(-3498255)
dates <- seq(mdy("09/01/2012"), mdy("11/06/2012"), by = "days")


tuneout_all <- dates %>% map_dfr(tuneout_day)

save(tuneout_all, file = paste0("data/ads_dd_tuneout_all.RData"))